*-----------------------------------------------------------------
*Written by Giri Subramaniam, date: My 4th, 2017
*-----------------------------------------------------------------
*Code generates balance sheet & income expenditure data in the second Townsend paper using the SIPP dataset.
*for CURRENT YEAR
*-----------------------------------------------------------------
/*FILE PATH: /shared/sdsjxs/Townsend/scripts/sipp_clean.do
VARIABLES DEFINITIONS:
ssuid: Sample Unit Identifier
epppnum: Person number
personid: Uniquely identifies a person
swave: Wave of data collection
spanel: Sample Code - Indicates Panel Year
shhadid: Current Address ID
eentaid: Entry Address ID

OTHER VARIABLES DESCRIPTIONS:
*refpd => Reference Period (4 months)
*month => Monthly question
*Person weight: wpfinwgt
*Household weight: whfnwgt */

/* Note on household level variables:
Some variables are divided by two and added in the head of household's and spouse's records. 
Some "household level" variables are simply duplicated for each member in the household. 
To facilitate easy aggregation, I divide the latter variables by the number of members in each household, 
and record this against each household member. 
A technical point: In the latter cases the condition "if tage>15" should not be used. */
*-----------------------------------------------------------------

clear all
set more off, perm
cap log close

*Input
*local in_path "/shared/sdsjxs/Townsend/rawdata/sipp"
*cd in_path
cd "/shared/sdsjxs/Townsend/rawdata/sipp"
*Output
global out_path "/shared/sdsjxs/Townsend/results"
local data_gen no

******************************************************************************************************
***************************************************** CORE MODULES *************************************
********************************************************************************************************
*-----------------------------------------------------------------
* CORE Module (Wave 10): CURRENT Year => Sep 2011 - Dec 2011
* File name: SIPP_2008_Panel_Wave_10-Core_File.pdf
* INCOME & EXPENDITURE
*-----------------------------------------------------------------
*LOAD SIPP 2008 Wave 10
use sipp08w10.dta, clear

*Household & Person weights.
gen perweight 	= wpfinwgt
gen hhweight 	= whfnwgt

tempfile start
save `start', replace

********************************* 3. Income ************************************
*(Note: some of these variables are "reaggregated" - caution: double counting)

**AUDIT: The thtotinc variable is total household income. Taking a properly
* weighted average gives ~70k. There's no reason to not have this
* as the top-line number, regardless of how the categories end up breaking
* out. So I will drop anything that doesn't add up in the "other income"
* category.

**AUDIT (by Giri on May 1st, 2017): Variables totalgovtran, totalpropinc and totalotherinc 
* were made annual. Before they were still monthly.

***AUDIT: Rewritten by Jason
gen exclude = tage<15
gen totalhhinc = thtotinc*12
gen totalearninc = thearn*12

*Caution: Don't include foodstamps, since it's already annual; make everything else annual; add foodstamps
egen govtran = rowtotal(thsocsec thssi thunemp thvets thtrninc thafdc) if (!missing(thsocsec) | !missing(thssi) | !missing(thunemp) | !missing(thvets) | !missing(thfdstp) | !missing(thtrninc) | !missing(thafdc))
replace govtran = govtran*12
*Add annual value of foodstamps to this.
egen totalgovtran = rowtotal(govtran thfdstp)
drop govtran

gen totalpropinc = thprpinc*12

gen totalotherinc = thothinc*12

collapse (mean) total* hhweight, by(ssuid shhadid)

qui describe, varlist

collapse (mean) total* (median) med_income=totalhhinc [pw = hhweight]
xpose, varname clear
rename _varname varname
order varname
replace v1=round(v1)
export excel using "$out_path/balance_sheet.xlsx", sheet("sipp_income_2011", replace)
clear

/*

gen totalhhinc_yr = thtotinc*12 if tage>=15
gen otherinc_yr = totalhhinc_yr

*Total person earned income per month (REAGGREGATED) - excluded
**AUDIT: This matches our general compensation of employees category,
* so I will use it.
gen totalinc_earned = tpearn*12 if tage>=15
replace otherinc_yr = otherinc_yr - totalinc_earned if !missing(totalinc_earned)

*Total income from moonlighting / extra jobs
*gen totinc_moon = tmlmsum*12 if tage>=15

*Gross pay: Earnings from job - 1st & 2nd
*forvalues i=1(1)2 {
*	gen totinc_job`i'= tpmsum`i'*12 if tage>15
*}

*egen totalcompensation 	= rowtotal(totinc_moon totinc_job?)
*drop totinc_moon totinc_job?

*Total person Income (REAGGREGATED) - excluded
*gen totinc_total= tptotinc*12 if tage>15

*Business Income - 1st & 2nd
forvalues i=1(1)2 {
	gen businc_`i' 	= tbmsum`i'*12 if tage>=15
}

*Net profit/loss per reference period - 1st & 2nd

*AUDIT: This should not be included. Business income will include
* any profit. This is either double counting or unnecessary counting
* (depending on whether it's positive or negative)

*forvalues i=1(1)2 {
*	gen profitloss`i'= tprftb`i'*3 if tage>15
*}

egen totalincbus= rowtotal(businc*) if !missing(businc_1) | !missing(businc_2)
drop businc*
replace otherinc_yr = otherinc_yr - totalincbus if !missing(totalincbus)


** AUDIT: According to the documentation, total property income includes
* all this stuff. Might as well just use that variable.
*Gross rent (refpd) - Joint
*gen grorent_j 	= tjarnt*3 if tage>15
*Gross rent (refpd) - Own
*gen grorent_o 	= toarnt*3 if tage>15
*Net rent (refpd) - Joint
*gen netrent_j	= tjaclr*3 if tage>15 
// WE NEED GROSS
*Net rent (refpd) - Own
*gen netrent_o	= toaclr*3 if tage>15 
// WE NEED GROSS
*Total OTHER property income (sum of tjaclr toaclr tjaclr2 tmijnt tmiown trndup1 trndup2)
/* tjaclr, tjaclr2: net income/loss from property; joint w. spouse
toaclr: net income from own rental property (-)
tmijnt: interest paid on mortgage; joint w. spouse (+)
tmiown: interest paid on own mortgage (+)
rndup1, trndup2: royalty income (+)
*/


*gen totalpropinc 	= tothprop*3 if tage>=15
gen totalpropinc = thprpinc*12 if tage>=15
replace otherinc_yr = otherinc_yr - totalpropinc if !missing(totalpropinc)
*SUBTRACT NET RENT
*gen minusnetrent_j= -netrent_j
*gen minusnetrent_o= -netrent_o

*egen totalrent 	= rowtotal(grorent_j grorent_o)
*drop grorent_j grorent_o

*Interest on Mortgage & Royalty income
**AUDIT: Not sure what was going on here... Where did royalties come from?
*egen mortg_royalty 	= rowtotal(totprop minusnetrent_j minusnetrent_o)
*drop totprop netrent* minusnetrent_j minusnetrent_o

*Total Interest income per month (refpd)
gen intincome 	= tintinc*3 if tage>=15
*Total Dividend income (refpd)
gen totdivinc 	= tdivinc*3 if tage>=15

egen totalintdiv 	= rowtotal(intincome totdivinc)
drop intincome totdivinc

replace otherinc_yr = otherinc_yr - totalintdiv if !missing(totalintdiv)

*Total property (asset) income per month (REAGGREGATED) - excluded
*gen tincmonth 	= tpprpinc*12 if tage>15

*Penson plans distribution per month (REAGGREGATED) - excluded
*gen pensioninc 	= tppndist*12 if tage>15
*Retired lump sum payments per month (REAGGREGATED) - excluded
*gen retlmpsm 	= tplumpsm*12 if tage>15

*egen totinc_ret = rowtotal(pensioninc retlmpsm)
*drop pensioninc retlmpsm

*Total cash transfer per month (REAGGREGATED) - exclude
*gen totalcashtrans 	= tptrninc*12 if tage>=15
*Total Other income per month (REAGGREGATED)  - exclude
*gen otherincome	= tpothinc*12 if tage>15

*egen totinc_other= rowtotal(totalcashtrans otherincome)
*drop cashtrans otherincome

*** AUDIT: Better to use the aggregated government assistance variable
gen totalgovinc = thafdc*12 if tage>=15
replace otherinc_yr = otherinc_yr - totalgovinc if !missing(totalgovinc)
/*
*------ GOVT. TRANSFERS FROM HERE
*Social security (adult) amount per month
gen gov1 	= t01amta*12 if tage>15
*Social security (child) amount per month
gen gov2 = t01amtk*12 if tage>15
*Railroad retirement per month
gen gov3 	= t02amt*12 if tage>15
*Federal SSI (adult)
gen gov4 	= t03amta*12 if tage>15
*Federal SSI (child)
gen gov5 	= t03amtk*12 if tage>15
*State SSI
gen gov6 	= t04amt*12 if tage>15
*State unemployment compensation
gen gov7 	= t05amt*12 if tage>15
*Supplemental unemployment benefits
gen gov8 	= t06amt*12 if tage>15
*Veteran's compensation
gen gov9 	= t08amt*12 if tage>15
*Disability insurance receipt
gen gov10 	= t13amt*12 if tage>15
*Public assistance payments
gen gov11 	= t20amt*12 if tage>15
*General assistance
gen gov12 	= t21amt*12 if tage>15
*Other welfare
gen gov13 	= t24amt*12 if tage>15
*WIC payments
gen gov14 	= t25amt*12 if tage>15
*Food stamps
gen gov15 	= t27amt*12 if tage>15
*Federal Civil Service pension (refpd)
gen gov16 	= t31amt*3 if tage>15
*Military retirement pay amount
gen gov17 	= t32amt*3 if tage>15
*State govt. pension
gen gov18 	= t34amt*3 if tage>15
*Local govt. pension
gen gov19 	= t35amt*3 if tage>15
*Other govt. income (month)
gen gov20 = t75amt*12 if tage>15
*------ END OF GOVERNMENT TRANSFERS

egen totalgovtrans 	= rowtotal(gov?)
drop gov? gov??

*AUDIT: No need to worry about this level of disaggregation.
*From business
*Severance pay
gen otherbusinc1 = t15amt*12 if tage>15
*Pension from company
gen otherbusinc2 = t30amt*3 if tage>15
*Worker's compensation
gen otherbusinc3 = t10amt*12 if tage>15
*Employer disability payments
gen otherbusinc4 = t14amt*12 if tage>15
*Transportation vouchers
gen otherbusinc5 = t60amtg*3 if tage>15
*Transportation-assistance subway tokens
gen otherbusinc6 = t60amtt*3 if tage>15
*Food assistance
gen otherbusinc7 = t61amt*3 if tage>15
*Clothing assistance
gen otherbusinc8 = t62amt*3 if tage>15

egen totalotherbusiness = rowtotal(otherbusinc?)
drop otherbusinc?
*/

*From persons
*Foster child care payments
gen otherpersinc1 = t23amt*12 if tage>=15
*Child support payments (4-month period)
gen otherpersinc2 = t28amt*3 if tage>=15
*Alimony payments
gen otherpersinc3 = t29amt*3 if tage>=15
*Child support received by agency on behalf of HH
gen otherpersinc4 = tcsagy*3 if tage>=15

egen totalotherperson = rowtotal(otherpersinc?)
drop otherpersinc?
replace otherinc_yr = otherinc_yr - totalotherperson if !missing(totalotherperson)

*AUDIT: Automatically captured now.
/*Other
*Lump sum payments received
gen allother1 	= t52amt*3 if tage>15
*Amount received from relatives/friends
gen allother2 	= t51amt*3 if tage>15
*Paid-up life insurance
gen allother3 	= t36amt*3 if tage>15
*Other retirement, disability or survivor
gen allother4 = t38amt*3 if tage>15
*Casual earnings
gen allother5	= t55amt*3 if tage>15
*Misc cash income
gen allother6 = t56amt*3 if tage>15
*Short-term cash assistance
gen allother7 	= t64amt*3 if tage>15
*Pension/Retirement lumpsums
*gen allother8 = t39amt*3 if t39amt!=0 // check if asset or not


egen totalotherincome = rowtotal(allother? mortg_royalty)
drop allother? mortg_royalty
*/

*some ("reaggregated") variables are EXCLUDED because of possible double counting

*egen totalincome = rowtotal(total*)


* COLLAPSE MEANS & EXPORT --------------------------------------------------------------------------------
keep ssuid shhadid hhweight monthweight total* otherinc_yr

* 2 collapses - HH level & then month level

collapse (sum) total* otherinc_yr monthweight (median) med_income=totalhhinc_yr by(ssuid shhadid)
*collapse (mean) total* otherinc_yr monthweight (median) med_income=totalhhinc_yr [pw = hhweight], by(ssuid shhadid)
*collapse (mean) total* otherinc_yr (median) med_income=med_income [pw = monthweight]
*make dataset pretty for Excel export
xpose, varname clear
rename _varname varname
order varname
replace v1=round(v1)
export excel using "$out_path/balance_sheet.xlsx", sheet("sipp_income_2011", replace)
clear
* END OF COLLAPSE & EXPORT --------------------------------------------------------------------------------
*/

/*
** HOUSEHOLD level variables for CROSS-VERIFICATION
*Total HH earned income per month
gen hhtotinc_earned= thearn*12

*Total HH Income (caution: double counting)
gen hhtotinc_total = thtotinc*12

*Total HH property income per month
gen hhtotinc_property= thprpinc*12

* Govt. Transfers (HH level)
*Social Security Income
gen hhgov1 	= thsocsec*12
*Supplemental Security Income
gen hhgov2 	= thssi*12
*Unemployment Income
gen hhgov3 	= thunemp*12
*Veterans Payments
gen hhgov4 	= thvets*12
*Public assistance payments
gen gov5 	= thafdc*12
*Food stamps
gen hhgov6 	= thfdstp*12

*Other (HH level)
*Total means-tested cash transfers
gen hhallother1 = thtrninc*12
*Total 'other' HH income
gen hhallother2 = thothinc*12

*--- END OF CORE MODULE
clear */
**********************************************************************************************************
***************************************************** TOPICAL MODULES ************************************
**********************************************************************************************************
*-----------------------------------------------------------------
* TOPICAL Module (Wave 10): CURRENT Year => Sep 2011 - Dec 2011
* File name: SIPP_2008_Panel_Wave_10-Topical_Module.pdf
* ASSETS & LIABILITIES
*-----------------------------------------------------------------
*LOAD Wave 10 (Sep 2011-Dec 2011)
use sipp08t10.dta, clear

*Household & Person weights.
gen perweight 	= wpfinwgt
*Household weights (=perweight for the reference person)
gen hhweight = wpfinwgt if inlist(errp,1,2)

tempfile start
save `start', replace

* Number of members in household variable
bysort ssuid: egen hhnum = max(eppidx)
*** Maybe age is a better way of doing this sorting? It's not.
gen count_age = tage>15
bysort ssuid shhadid: egen hhnum2 = sum(count_age)

********************************* 1. Assets ************************************
* FINANCIAL ASSETS (AUDIT: Need not multiply by 3)
**Current Assets
*Joint non-interest checking a/c
gen current1 	= taljcha if tage>15
*Own non-interest checking a/c
gen current2 	= talicha if tage>15

*Joint interest earning a/c (these could be checking a/c's)
gen current3 	= tiajta if tage>15
*Own interest earning a/c
gen current4 	= tiaita if tage>15

*Bonds/US Secs - Joint
gen current5 	= timja if tage>15
*Bonds/US Secs - Own
gen current6 	= timia if tage>15
*US Savings Bonds - own
gen current7 	= talsbv if tage>15

egen totalcheck = rowtotal(current1 current2)
egen totaldeposit=rowtotal(current3 current4)
egen totalbond 	= rowtotal(current5 current6 current7)
drop current?

**Long-Term Investments (AUDIT: Need not multiply by 3)
*IRA accounts - own
gen lterm1 	= talrb if tage>15
*KEOGH accounts - own
gen lterm2 	= talkb if tage>15
*Thrift plan - own
gen lterm3 	= taltb if tage>15

*Life insurance (ALL)
gen lterm4 	= talliv if tage>15

*Equity in Investments
gen lterm5 	= toaeq if tage>15

*Stocks/Funds - Joint
gen lterm6 	= tsmjv if tage>15
*Stocks/Funds - Own
gen lterm7 	= tsmiv if tage>15

*Principal owed to you on mortgage - joint w. spouse
gen lterm8 	= tmjp if tage>15

egen totalmutfun 	= rowtotal(lterm6 lterm7)
egen totalretire 	= rowtotal(lterm1 lterm2 lterm3)
egen totallife 		= rowtotal(lterm4)
egen totaleqinv 	= rowtotal(lterm5)
egen totalloanout 	= rowtotal(lterm8)
drop lterm?

* TANGIBLE ASSETS
*Property value
gen tpropval_hh = tpropval/hhnum
gen tangible1 	= tpropval_hh //  HH level data -- duplicated
*Mobile home
gen tmhval_hh 	= tmhval/hhnum
gen tangible2 	= tmhval_hh //  HH level data -- duplicated

*Equity in other real estate (pg.86)
gen tothreva_hh	= tothreva/hhnum
gen tangible3 	= tothreva_hh  //  HH level data -- duplicated

*Car value - 1st, 2nd & 3rd
forvalues i=1(1)3 {
	local j=`i'+3
	gen tcarval`i'_hh 	=  tcarval`i'/hhnum
	gen tangible`j' 	= tcarval`i'_hh
}
*Other Vehicle value - 1st & 2nd
forvalues i=1(1)2 {
	local j = `i'+6
	gen tov`i'val_hh 	= tov`i'val/hhnum
	gen tangible`j' = tov`i'val_hh
}

*Mkt value of rental property - joint w. spouse
gen tangible9 	= trjmv if tage>15
*Mkt value of rental property - own
gen tangible10 	= trimv if tage>15
*Mkt value of rental property - joint w. others
*** AUDIT: This shouldn't be included, since "trtsha" is the relevant variable.
*gen tangible11 	= trtmv if tage>15
*Share of rental property held w. others
gen tangible12 	= trtsha if tage>15

*Business value - 1st & 2nd
forvalues i=1(1)2 {
	local j = `i'+12
	gen tangible`j' = tvbva`i' if tage>15
}

egen totalbusns 	= rowtotal(tangible13 tangible14)
egen totalprimres 	= rowtotal(tangible1 tangible2 tangible9 tangible10 tangible12)
egen totalothrrealest 	= rowtotal(tangible3)
egen totalvehic 	= rowtotal(tangible4 tangible5 tangible6 tangible7 tangible8)
drop tangible? tangible??

egen totalassets = rowtotal(total*)

* COLLAPSE MEANS & EXPORT --------------------------------------------------------------------------------
keep ssuid shhadid perweight total* hhweight

* 2 collapses - first sum within households & then average across households (use hhweight here)
*collapse (mean) total* (mean) hhweight (median) med_assets=totalassets [aw = perweight], by(ssuid shhadid)
collapse (sum) total* (mean) hhweight (median) med_assets=totalassets , by(ssuid shhadid)

collapse (mean) total* (median) med_assets=med_assets [aw = hhweight]

*** To my knowledge, this is the correct way to calculate averages in the SIPP.

*make dataset pretty for Excel export
xpose, varname clear
rename _varname varname
order varname
replace v1=round(v1)
export excel using "$out_path/balance_sheet.xlsx", sheet("sipp_assets_2011", replace)
* END OF COLLAPSE & EXPORT --------------------------------------------------------------------------------
* UNKNOWN ASSETS
*Total Net Worth (calculated variable - POSSIBLE double counting)
*gen unknown1 	= thhtnw if thhtnw!=0 // HH level data
*Total Wealth (calculated variable - POSSIBLE double counting)
*gen unknown2 	= thhtwlth if thhtwlth!=0 // HH level
clear



*-----------------------------------------------------------------
*LOAD Wave 10 (Sep 2011-Dec 2011)
use sipp08t10.dta, clear

*Household & Person weights.
gen perweight 	= wpfinwgt
*Household weights (=perweight for the reference person)
gen hhweight = wpfinwgt if inlist(errp,1,2)

tempfile start
save `start', replace

* Number of members in household variable
bysort ssuid: egen hhnum = max(eppidx)

********************************* 2. Liabilities *******************************
/* most of these are joint debt (with spouse).
so they're divided by 2 and copied to both spouses records.
careful while using weights. */

* REVOLVING DEBT      (AUDIT: Need not multiply by 3)

*store bills or credit cards - joint
gen revdebt1 	= taljdab if tage>15 // divided by TWO and added to spouse's record
*store bills or credit cards - own
gen revdebt2 	= talidab if tage>15
*** Audit (by Giri): This is actually total revolving debt, since it includes store bills as well.
*egen totalcreditcard 	= rowtotal(revdebt?)
egen totalrevdebt 	= rowtotal(revdebt?)
drop revdebt?

* NON-REVOLVING DEBT
*Loans owed with spouse (other than car/home equity loans)
gen nonrevdebt1 = taljdal if tage>15 // divided by TWO and added to spouse's record
*Other Debt owed with spouse
gen nonrevdebt2 = taljdao if tage>15 // divided by TWO and added to spouse's record
*Loans owed - own
gen nonrevdebt3 = talidal if tage>15
*Other debt - own
/* (INCLUDING: medical bills, money owed to pvt. individuals, educational loans, other debt
EXCLUDING: mortgages, home equity loans, and car loans) */
gen nonrevdebt4 = talidao if tage>15
*Principal on all loans
gen tmor1pr_hh 	= tmor1pr/hhnum
gen nonrevdebt5 = tmor1pr_hh if tage>15 // HH level data -- duplicated

*Mortgage on mobile home
*gen nonrevdebt6 = thomeamt if tage>15
***Mistake: This needs to be in Expenditures

*Amt owed for vehicle - 1st, 2nd & 3rd (nonrevdebt7-9)
forvalues i=1(1)3 {
	local j=`i'+6
	gen ta`i'amt_hh 	= ta`i'amt/hhnum
	gen nonrevdebt`j' 	= ta`i'amt_hh // HH level data -- duplicated
}
*Amt owed for Other vehicle - 1st & 2nd (nonrevdebt10-11)
forvalues i=1(1)2 {
	local j=`i'+9
	gen tov`i'amt_hh 	= tov`i'amt/hhnum
	gen nonrevdebt`j' 	= tov`i'amt_hh // HH level data -- duplicated
}

*Debt on Stocks/MFs - Joint
gen nonrevdebt12= tsmjmav if tage>15 // divided by TWO and added to spouse's record
*Debt on Stocks/MFs - Own
gen nonrevdebt13= tsmimav if tage>15 

*Principal owed on rental property - joint w. spouse
* AUDIT: (by Giri) This number is duplicated to the spouse's records only, just like the others.
* Although, the documentation does not mention this.
gen nonrevdebt14= trjpri if tage>15 & inlist(errp,1,2)
*Principal owed on rental property - own
gen nonrevdebt15= tripri if tage>15
*Principal owed on rental property - joint w. others
gen nonrevdebt16= trtpri if tage>15

*Business debt - 1st & 2nd (nonrevdebt18-19)
forvalues i=1(1)2 {
	local j = `i'+17
	gen nonrevdebt`j' = tvbde`i' if tage>15
}

egen totalotherdebt 	= rowtotal(nonrevdebt1 nonrevdebt2 nonrevdebt3 nonrevdebt4 nonrevdebt5)
* AUDIT: (Giri) We don't include nonrevdebt 14-16 otherwise we would be double counting.
* Theoretically, "Principal owed on all mortgages or loans" should include "Principal owed on joint rental property".
egen totalvehicle 	= rowtotal(nonrevdebt7 nonrevdebt8 nonrevdebt9 nonrevdebt10 nonrevdebt11)
egen totalinvestment 	= rowtotal(nonrevdebt12 nonrevdebt13)
egen totalbusiness 	= rowtotal(nonrevdebt18 nonrevdebt19)
drop nonrevdebt? nonrevdebt??

egen totalliabilities 	= rowtotal(total*)

* COLLAPSE MEANS & EXPORT --------------------------------------------------------------------------------
keep ssuid shhadid perweight total* hhweight

* 2 collapses - HH level & then month level
collapse (sum) total* (mean) hhweight (median) med_liabilities=totalliabilities , by(ssuid shhadid)

collapse (mean) total* (median) med_liabilities=med_liabilities [aw = hhweight]
*make dataset pretty for Excel export
xpose, varname clear
rename _varname varname
order varname
replace v1=round(v1)
export excel using "$out_path/balance_sheet.xlsx", sheet("sipp_liabilities_2011", replace)
* END OF COLLAPSE & EXPORT --------------------------------------------------------------------------------
clear



*-----------------------------------------------------------------
*LOAD Wave 10 (Sep 2011-Dec 2011)
use sipp08t10.dta, clear

*Household & Person weights.
gen perweight 	= wpfinwgt
*Household weights (=perweight for the reference person)
gen hhweight = wpfinwgt if inlist(errp,1,2)

tempfile start
save `start', replace


********************************* 4. Expenditure ******************************* (NOT COMPREHENSIVE)
*Rent/Mortgage/Utility - person 1, 2 & 3
forvalues i=1(1)3 {
	gen rent`i' = tpersam`i'*12 if tage>15
}
*Monthly rent/mortgage
gen rentmortgage = thomeamt*12 if tage>15

egen totalhousing= rowtotal(rent? rentmortgage)
drop rent?

*Utility bills (Electricity, Gas, Telephone, Other Utilities)
gen utility 	= tutils*12 if tage>15 // excluded because otherwise double counting with tpersam`i'

*Work expenses
gen work 	= epvanexp if tage>15

egen totalmisc 	= rowtotal(work)
drop work

*Commuting expenses
gen commute 	= epvcomut*52 if tage>15
*Parking/Toll expenses
gen parkingtoll = epvpaywk*52 if tage>15

egen totaltransp= rowtotal(commute parkingtoll)
drop commute parkingtoll

*Child care expenses
forvalues i 	= 1(1)4 {
	gen childcare`i' = tpvccfp`i'*52 if tage>15
}
*Child support expenses
forvalues i 	= 1(1)4 {
	gen childsupport`i' = tpvchpa`i'*12 if tage>15
}

egen totalpersonalcare 	= rowtotal(childcare? childsupport?)
drop childcare? childsupport?

egen totalexpenditure 	= rowtotal(total*)

* COLLAPSE MEANS & EXPORT --------------------------------------------------------------------------------
keep ssuid shhadid perweight total* epppnum hhweight

* 2 collapses - sum within HH level & then average across households
collapse (sum) total* (mean) perweight hhweight (median) med_expenses=totalexpenditure , by(ssuid shhadid)

collapse (mean) total* (median) med_expenses=med_expenses [aw = hhweight]

*make dataset pretty for Excel export
xpose, varname clear
rename _varname varname
order varname
replace v1=round(v1)
export excel using "$out_path/balance_sheet.xlsx", sheet("sipp_expenditure_2011", replace)
* END OF COLLAPSE & EXPORT --------------------------------------------------------------------------------

clear


********************************* 4. Taxes ******************************* (NOT COMPREHENSIVE)
*LOAD Wave 7 (JAN 2011-Apr 2011)
use sipp08t8.dta, clear

*Household & Person weights.
gen perweight 	= wpfinwgt
*Household weights (=perweight for the reference person)
gen hhweight = wpfinwgt if inlist(errp,1,2)

tempfile start
save `start', replace

*Note: Taxes in the SIPP are reported in bins so we take the midpoints. Also, these pertain to the year 2009.

*Income Tax (Net tax liability in 2009; Form 1040/EZ)
* (-1:don't know/refused/none/losses & 0:not answered)
gen tax_income 		= 100 if tnettax==1
replace tax_income 	= 450 if tnettax==2
replace tax_income 	= 950 if tnettax==3
replace tax_income 	= 1600 if tnettax==4
replace tax_income 	= 2500 if tnettax==5
replace tax_income 	= 3500 if tnettax==6
replace tax_income 	= 5000 if tnettax==7
replace tax_income 	= 7000 if tnettax==8
replace tax_income 	= 9500 if tnettax==9
replace tax_income 	= 13500 if tnettax==10
replace tax_income 	= 21500 if tnettax==11
replace tax_income 	= 27000 if tnettax==12
*replace tax_income 	= 0 if tnettax==-1

*Property Tax (Property tax bill for residence in 2009)
* (-1:don't know/refused/none/losses & 0:not answered)
gen tax_property  	= 100 if ttaxbill==1
replace tax_property 	= 450 if ttaxbill==2
replace tax_property 	= 950 if ttaxbill==3
replace tax_property 	= 1600 if ttaxbill==4
replace tax_property 	= 2500 if ttaxbill==5
replace tax_property 	= 3500 if ttaxbill==6
replace tax_property 	= 5000 if ttaxbill==7
replace tax_property 	= 7000 if ttaxbill==8
replace tax_property 	= 9500 if ttaxbill==9
replace tax_property 	= 13500 if ttaxbill==10
replace tax_property 	= 21500 if ttaxbill==11
replace tax_property 	= 27000 if ttaxbill==12
*replace tax_property 	= 0 if ttaxbill==-1

egen totaltaxes 	= rowtotal(tax*)

* COLLAPSE MEANS & EXPORT --------------------------------------------------------------------------------
keep ssuid shhadid perweight epppnum hhweight  total* tax_income tax_property

* 2 collapses - sum within HH level & then average across households
collapse (sum) tax_income tax_property totaltax (mean) perweight hhweight (median) med_taxes=totaltaxes, by(ssuid shhadid)

collapse (mean) tax_income tax_property totaltax (median) med_taxes=totaltaxes [aw = hhweight]

*make dataset pretty for Excel export
xpose, varname clear
rename _varname varname
order varname
replace v1=round(v1)
export excel using "$out_path/balance_sheet.xlsx", sheet("sipp_taxes_2011", replace)
* END OF COLLAPSE & EXPORT --------------------------------------------------------------------------------

clear
* end of Do-file
